LAGOS Analysis

Loading in data

First download and then specifically grab the locus (or site lat longs)

#Lagos download script
lagosne_get(dest_folder = LAGOSNE:::lagos_path())
## Warning in lagosne_get(dest_folder = LAGOSNE:::lagos_path()): LAGOSNE data for this version already exists on the local machine.
##   Re-download if neccessary using the 'overwrite` argument.'
#Load in lagos
lagos <- lagosne_load()
## Warning in `_f`(version = version, fpath = fpath): LAGOSNE version
## unspecified, loading version: 1.087.3
#Grab the lake centroid info
lake_centers <- lagos$locus

# Make an sf object 
spatial_lakes <- st_as_sf(lake_centers,coords=c('nhd_long','nhd_lat'),
                          crs=4326)

#Grab the water quality data
nutr <- lagos$epi_nutr

#Look at column names
# names(nutr)

Subset columns nutr to only keep key info that we want

clarity_only <- nutr %>%
  select(lagoslakeid,sampledate,chla,doc,secchi) %>%
  mutate(sampledate = as.character(sampledate) %>% ymd(.))

Keep sites with at least 200 observations

#Look at the number of rows of dataset
#nrow(clarity_only)

chla_secchi <- clarity_only %>%
  filter(!is.na(chla),
         !is.na(secchi))

# How many observations did we lose?
# nrow(clarity_only) - nrow(chla_secchi)


# Keep only the lakes with at least 200 observations of secchi and chla
chla_secchi_200 <- clarity_only %>%
  group_by(lagoslakeid) %>%
  mutate(count = n()) %>%
  filter(count > 200)

# #look at unique sites
# length(unique(chla_secchi_200$lagoslakeid))

Join water quality data to spatial data

# inner_join - all data must have both!
spatial_200 <- inner_join(spatial_lakes,chla_secchi_200 %>%
                            distinct(lagoslakeid,.keep_all=T),
                          by='lagoslakeid')

# mapview(spatial_200)

Mean Chl_a map

### Take the mean chl_a and secchi by lake

mean_values_200 <- chla_secchi_200 %>%
  # Take summary by lake id
  group_by(lagoslakeid) %>%
  # take mean chl_a per lake id
  summarize(mean_chl = mean(chla,na.rm=T),
            mean_secchi=mean(secchi,na.rm=T)) %>%
  #Get rid of NAs
  filter(!is.na(mean_chl),
         !is.na(mean_secchi)) %>%
  # Take the log base 10 of the mean_chl
  mutate(log10_mean_chl = log10(mean_chl))

#Join datasets
mean_spatial <- inner_join(spatial_lakes,mean_values_200,
                          by='lagoslakeid') 

#Make a map
mapview(mean_spatial,zcol='log10_mean_chl')

Class work

3) What is the correlation between Secchi Disk Depth and Chlorophyll a for

sites with at least 200 observations?

  • Here, I just want a plot of chla vs secchi for all sites
#Graph Chla vs Secchi Disk:
ggplot(data=chla_secchi_200, aes(x=chla_secchi_200$chla, y=chla_secchi_200$secchi))+
  geom_point()+
  theme_few()+
  xlab("Secchi Disk dDpth")+
  ylab("Chl A Conc")
## Warning: Removed 419421 rows containing missing values (geom_point).

#When clarity is low (i.e. shallow secchi depth), Chlorophyll A concentrations is high. This relationship decreases exponentially.

Why might this be the case?

This could be because as the Chlo A Concentration increases the water becomes greener and also has more algae growing in it. The greenness and algae decreases the clarity of the water.

2) What states have the most data?

2a) First you will need to make a lagos spatial dataset that has the total

number of counts per site.

#Join datasets
wq_spatial <- inner_join(spatial_lakes,chla_secchi_200,
                          by='lagoslakeid') 

2b) Second, you will need to join this point dataset to the us_boundaries data.

states <- us_states()

wq_spatial_state<-st_join(wq_spatial,states)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar

2c) Then you will want to group by state and sum all the observations in that

state and arrange that data from most to least total observations per state.

count_spatial_state<-wq_spatial_state%>%
                  group_by(state_name)%>%
                  mutate(state_count = n())%>%
                  arrange(desc(state_count))

# Top 3 States include: Minnesota, Wisconsin, and Michigan

##3 Is there a spatial pattern in Secchi disk depth for lakes with at least 200 observations?

#Make a map
mean_values_200_secchi <- mean_values_200 %>%
  # Take the log base 10 of the mean_secchi
  mutate(log10_mean_secchi = log10(mean_secchi))

#Join datasets 
mean_spatial_2 <- inner_join(spatial_lakes,mean_values_200_secchi ,
                          by='lagoslakeid')

mapview(mean_spatial_2 ,zcol='log10_mean_secchi')
# It appears that secchi disk depth follows the same pattern of Chl A Conc. Depths are greatest on the East Coast than in the Great Lakes region